%Image category classification using Bad of Features/Words

%location of the compressed data set:
url = 'http://www.vision.caltech.edu/Image_Datasets/Caltech101/101_ObjectCategories.tar.gz';
%store the output in a temporary folder:
outputFolder = fullfile(tempdir, 'caltech101'); %deine output folder
if ~exist(outputFolder,'dir')
   untar(url,outputFolder); 
end

%Note that for the bag of features approach to be effective, majority of
%each image's area must be occupied by the subject of the category, for
%example, an object or a type of scene:
rootFolder = fullfile(outputFolder, '101_ObjectCategories');
categories = {'airplanes','ferry','laptop'};
imds = imageDatastore(fullfile(rootFolder, categories),'LabelSource','foldernames');
%you can easily inspect the number of images per category as well as
%vategory labels as shown below:
tbl = countEachLabel(imds);
minSetCount = min(tbl{:,2});
imds = splitEachLabel(imds, minSetCount, 'randomize');
countEachLabel(imds);
[trainingSet,validationsSet] = splitEachLabel(imds, 0.3, 'randomize');
airplanes = find(trainingSet.Labels == 'airplanes',1);
ferry = find(trainingSet.Labels == 'ferry',1);
laptop = find(trainingSet.Labels == 'laptop',1);

%Create a visual vocabulary and train an image category classifier:
%band of words is a technique adapted to computer vision from the world of
%naturl language processing. since images do not actually contain discrete
%words, we first construct a vocabulary of SURF features representative of
%each image category.
%this is accomplished with a single call to bagOfFeatures function, which:
% 1. extracts SURF features from all images in all image categories:
% 2. constructs the visual vocabulary by reducing the number of features through 
%    quantization of feature space using K-means clustering
bag = bagOfFeatures(trainingSet);





%Training a convolutional network:
digitalDatasetPath = fullfile(matlabroot,'toolbox','nnet','nndemos','ndatasets','DigitalDataset');
digitalData = imageDatastore(digitalDatasetPath,'IncludeSubfolders',true,'LabelSource','foldernames');
%the data store contains 10000 synthetic images of digits 0-9. the images
%are generated by applying random transformations to digit images created
%using different fonts. each digit image is 28X28 pixels:
figure;
per = randperm(10000,20);
for i=1:20
   subplot(4,5,i);
   imshow(digitalData.Files{perm(i)});
end
digitalData.countEachLabels

trainingNumFiles = 750;
rng(1); %for reproducibility
[trainDigitData,testDigitData] = splitEachLabel(digitData,trainingNumFiles,'randomize');
%Define the CNN architecture:
layers = [imageInputLayer([28,28,1]);...
          convolution2dLayer(5,20);...
          reluLayer();...
          maxPooling2dLayer(2,'Stride',2);
          fullyConnectedLayer(10);...
          softmaxLayer();...
          classificationLayer()];
options = trainingOptions('sgdm','MaxEpochs',20,'InitialLearnRate',0.0001);
convnet = trainNetwork(trainDigitData,layers,options);
YTest = classify(convnet,testDigitalData);
TTest = testDigitData.Labels;
accuracy = sum(YTest == TTest)/numel(TTest);













